SGI Developer Toolbox 6.1

home *** CD-ROM | disk | FTP | other *** search

/ SGI Developer Toolbox 6.1 / SGI Developer Toolbox 6.1 - Disc 4.iso / src / exampleCode / MP / timer / regular / linpackd.l < prev next >

Wrap

Text File | 1994-08-02 | 70.8 KB | 1,058 lines

PFA/SGI 10.0 k092805 910529 _MAIN Source 10-Aug-1993 10:08:55 Page 1 Footnotes Actions DO Loops Line DIR 1 # 1 "linpackd.f" 2 * 3 *PLEASE NOTE THAT netlib HAS MOVED, THE NEW ADDRESS IS netlib@ornl.gov. 4 *THE OLD ADDRESS, netlib@mcs.anl.gov, WILL BE TURNED OFF SOON. 5 * 6 *** from netlib, Fri Jul 27 14:07:10 EDT 1990 *** 7 double precision second 8 double precision aa(200,200),a(201,200),b(200),x(200) 9 double precision time(8,6),cray,ops,total,norma,normx 10 double precision resid,residn,eps,epslon 11 integer ipvt(200) 12 lda = 201 13 ldaa = 200 14 c 15 n = 100 16 cray = .056 17 write(6,1) 18 1 format(' Please send the results of this run to:'// 19 $ ' Jack J. Dongarra'/ 20 $ ' Computer Science Department'/ 21 $ ' University of Tennessee'/ 22 $ ' Knoxville, Tennessee 37996-1300'// 23 $ ' Fax: 615-974-8296'// 24 $ ' Internet: dongarra@cs.utk.edu'/) SO 25 ops = (2.0d0*n**3)/3.0d0 + 2.0d0*n**2 26 c 27 call matgen(a,lda,n,b,norma) 28 t1 = second() 29 call dgefa(a,lda,n,ipvt,info) 30 time(1,1) = second() - t1 31 t1 = second() 32 call dgesl(a,lda,n,ipvt,b,0) 33 time(1,2) = second() - t1 34 total = time(1,1) + time(1,2) 35 c 36 c compute a residual to verify results. 37 c 1 2 SO C +--------- 38 do 10 i = 1,n * 39 x(i) = b(i) *_________ 40 10 continue 41 call matgen(a,lda,n,b,norma) 1 2 SO C +--------- 42 do 20 i = 1,n SO * 43 b(i) = -b(i) *_________ 44 20 continue 45 call dmxpy(n,b,n,lda,x,a) 46 resid = 0.0 47 normx = 0.0 1 2 SO +--------- 48 do 30 i = 1,n 3 DD ! 49 resid = dmax1( resid, dabs(b(i)) ) 4 DD ! 50 normx = dmax1( normx, dabs(x(i)) ) !_________ 51 30 continue PFA/SGI 10.0 k092805 910529 _MAIN Source 10-Aug-1993 10:08:55 Page 2 52 eps = epslon(1.0d0) 53 residn = resid/( n*norma*normx*eps ) 54 write(6,40) 55 40 format(' norm. resid resid machep', 56 $ ' x(1) x(n)') 57 write(6,50) residn,resid,eps,x(1),x(n) 58 50 format(1p5e16.8) 59 c 60 write(6,60) n 61 60 format(//' times are reported for matrices of order ',i5) 62 write(6,70) 63 70 format(6x,'dgefa',6x,'dgesl',6x,'total',5x,'mflops',7x,'unit', 64 $ 6x,'ratio') 65 c 66 time(1,3) = total 67 time(1,4) = ops/(1.0d6*total) 68 time(1,5) = 2.0d0/time(1,4) 69 time(1,6) = total/cray 70 write(6,80) lda 71 80 format(' times for array with leading dimension of',i4) 72 write(6,110) (time(1,i),i=1,6) 73 c 74 call matgen(a,lda,n,b,norma) 75 t1 = second() 76 call dgefa(a,lda,n,ipvt,info) 77 time(2,1) = second() - t1 78 t1 = second() 79 call dgesl(a,lda,n,ipvt,b,0) 80 time(2,2) = second() - t1 81 total = time(2,1) + time(2,2) 82 time(2,3) = total 83 time(2,4) = ops/(1.0d6*total) 84 time(2,5) = 2.0d0/time(2,4) 85 time(2,6) = total/cray 86 c 87 call matgen(a,lda,n,b,norma) 88 t1 = second() 89 call dgefa(a,lda,n,ipvt,info) 90 time(3,1) = second() - t1 91 t1 = second() 92 call dgesl(a,lda,n,ipvt,b,0) 93 time(3,2) = second() - t1 94 total = time(3,1) + time(3,2) 95 time(3,3) = total 96 time(3,4) = ops/(1.0d6*total) 97 time(3,5) = 2.0d0/time(3,4) 98 time(3,6) = total/cray 99 c 5 SO 100 ntimes = 10 101 tm2 = 0 102 t1 = second() SO +--------- 103 do 90 i = 1,ntimes 6 NO NCS ! 104 tm = second() PFA/SGI 10.0 k092805 910529 _MAIN Source 10-Aug-1993 10:08:55 Page 3 7 8 9 10 11 12 NO DD NCS ! 105 call matgen(a,lda,n,b,norma) 6 13 NO DD NCS ! 106 tm2 = tm2 + second() - tm 7 8 9 14 15 16 NO DD NCS ! 107 call dgefa(a,lda,n,ipvt,info) !_________ 108 90 continue SO 109 time(4,1) = (second() - t1 - tm2)/ntimes 110 t1 = second() SO +--------- 111 do 100 i = 1,ntimes 7 8 9 11 15 17 NO DD NCS ! 112 call dgesl(a,lda,n,ipvt,b,0) !_________ 113 100 continue SO 114 time(4,2) = (second() - t1)/ntimes 115 total = time(4,1) + time(4,2) 116 time(4,3) = total 117 time(4,4) = ops/(1.0d6*total) 118 time(4,5) = 2.0d0/time(4,4) 119 time(4,6) = total/cray 120 c 121 write(6,110) (time(2,i),i=1,6) 122 write(6,110) (time(3,i),i=1,6) 123 write(6,110) (time(4,i),i=1,6) 124 110 format(6(1pe11.3)) 125 c 126 call matgen(aa,ldaa,n,b,norma) 127 t1 = second() 128 call dgefa(aa,ldaa,n,ipvt,info) 129 time(5,1) = second() - t1 130 t1 = second() 131 call dgesl(aa,ldaa,n,ipvt,b,0) 132 time(5,2) = second() - t1 133 total = time(5,1) + time(5,2) 134 time(5,3) = total 135 time(5,4) = ops/(1.0d6*total) 136 time(5,5) = 2.0d0/time(5,4) 137 time(5,6) = total/cray 138 c 139 call matgen(aa,ldaa,n,b,norma) 140 t1 = second() 141 call dgefa(aa,ldaa,n,ipvt,info) 142 time(6,1) = second() - t1 143 t1 = second() 144 call dgesl(aa,ldaa,n,ipvt,b,0) 145 time(6,2) = second() - t1 146 total = time(6,1) + time(6,2) 147 time(6,3) = total 148 time(6,4) = ops/(1.0d6*total) 149 time(6,5) = 2.0d0/time(6,4) 150 time(6,6) = total/cray 151 c 152 call matgen(aa,ldaa,n,b,norma) 153 t1 = second() 154 call dgefa(aa,ldaa,n,ipvt,info) PFA/SGI 10.0 k092805 910529 _MAIN Source 10-Aug-1993 10:08:55 Page 4 155 time(7,1) = second() - t1 156 t1 = second() 157 call dgesl(aa,ldaa,n,ipvt,b,0) 158 time(7,2) = second() - t1 159 total = time(7,1) + time(7,2) 160 time(7,3) = total 161 time(7,4) = ops/(1.0d6*total) 162 time(7,5) = 2.0d0/time(7,4) 163 time(7,6) = total/cray 164 c 5 SO 165 ntimes = 10 166 tm2 = 0 167 t1 = second() SO +--------- 168 do 120 i = 1,ntimes 6 NO NCS ! 169 tm = second() 7 10 11 12 18 19 NO DD NCS ! 170 call matgen(aa,ldaa,n,b,norma) 6 13 NO DD NCS ! 171 tm2 = tm2 + second() - tm 7 14 15 16 18 19 NO DD NCS ! 172 call dgefa(aa,ldaa,n,ipvt,info) !_________ 173 120 continue SO 174 time(8,1) = (second() - t1 - tm2)/ntimes 175 t1 = second() SO +--------- 176 do 130 i = 1,ntimes 7 11 15 17 18 19 NO DD NCS ! 177 call dgesl(aa,ldaa,n,ipvt,b,0) !_________ 178 130 continue SO 179 time(8,2) = (second() - t1)/ntimes 180 total = time(8,1) + time(8,2) 181 time(8,3) = total 182 time(8,4) = ops/(1.0d6*total) 183 time(8,5) = 2.0d0/time(8,4) 184 time(8,6) = total/cray 185 c 186 write(6,140) ldaa 187 140 format(/' times for array with leading dimension of',i4) 188 write(6,110) (time(5,i),i=1,6) 189 write(6,110) (time(6,i),i=1,6) 190 write(6,110) (time(7,i),i=1,6) 191 write(6,110) (time(8,i),i=1,6) 192 stop 193 end Abbreviations Used NO not optimized DD data dependence SO scalar optimization DIR directive NCS non-concurrent-stmt C concurrentized Footnote List PFA/SGI 10.0 k092805 910529 _MAIN Source 10-Aug-1993 10:08:55 Page 5 1: scalar optimization Loop unrolled 4 times to improve scalar performance. 2: scalar optimization Cleanup loop for loop unrolling. 3: data dependence Data dependence involving this line due to variable "RESID". 4: data dependence Data dependence involving this line due to variable "NORMX". 5: scalar optimization Statement deleted because of scalar optimization. 6: not optimized Unoptimizable call to "SECOND" found. 7: data dependence Data dependence involving this line due to variable "N". 8: data dependence Data dependence involving this line due to variable "LDA". 9: data dependence Data dependence involving this line due to variable "A". 10: data dependence Data dependence involving this line due to variable "NORMA". 11: data dependence Data dependence involving this line due to variable "B". 12: not optimized Unoptimizable call to "MATGEN" found. 13: data dependence Data dependence involving this line due to variable "TM2". 14: data dependence Data dependence involving this line due to variable "INFO". 15: data dependence Data dependence involving this line due to variable "IPVT". 16: not optimized Unoptimizable call to "DGEFA" found. 17: not optimized Unoptimizable call to "DGESL" found. 18: data dependence Data dependence involving this line due to variable "LDAA". 19: data dependence Data dependence involving this line due to variable "AA". PFA/SGI 10.0 k092805 910529 _MAIN Loop Summary 10-Aug-1993 10:08:55 Page 6 Loop Summary From To Loop Loop at Unroll Unroll Iteration Loop# line line label index nest weight factor workload Status 1 38 40 Do 10 I 1 3 4 scalar mode preferable 2 38 40 Do 10 I 1 3 4 3 concurrentized 3 42 44 Do 20 I 1 4 4 scalar mode preferable 4 42 44 Do 20 I 1 4 4 4 concurrentized 5 48 51 Do 30 I 1 12 4 unrolled 6 48 51 Do 30 I 1 12 4 unrolled 7 103 108 Do 90 I 1 204 1 unoptimizable call (DGEFA) 8 111 113 Do 100 I 1 50 1 unoptimizable call (DGESL) 9 168 173 Do 120 I 1 204 1 unoptimizable call (DGEFA) 10 176 178 Do 130 I 1 50 1 unoptimizable call (DGESL) PFA/SGI 10.0 k092805 910529 MATGEN Source 10-Aug-1993 10:08:55 Page 7 Footnotes Actions DO Loops Line 194 subroutine matgen(a,lda,n,b,norma) 195 double precision a(lda,1),b(1),norma 196 c 197 init = 1325 198 norma = 0.0 1 OPT +--------- 199 do 30 j = 1,n 2 3 SO *+-------- 200 do 20 i = 1,n 4 DD *! 201 init = mod(3125*init,65536) 4 DD *! 202 a(i,j) = (init - 32768.0)/16384.0 5 DD *! 203 norma = dmax1(dabs(a(i,j)), norma) *!________ 204 20 continue *_________ 205 30 continue 1 OPT +--------- 206 do 35 i = 1,n ! 207 b(i) = 0.0 !_________ 208 35 continue 2 3 LR SO +--------- 209 do 50 j = 1,n LR SO C !+-------- 210 do 40 i = 1,n 6 DD !* 211 b(i) = b(i) + a(i,j) !*________ 212 40 continue !_________ 213 50 continue 214 return 215 end Abbreviations Used OPT optimized LR loop reordering DD data dependence SO scalar optimization C concurrentized Footnote List 1: optimized Loop has been fused with others to reduce overhead. 2: scalar optimization Loop unrolled 4 times to improve scalar performance. 3: scalar optimization Cleanup loop for loop unrolling. 4: data dependence Data dependence involving this line due to variable "INIT". 5: data dependence Data dependence involving this line due to variable "NORMA". 6: data dependence Data dependence involving this line due to variable "B". PFA/SGI 10.0 k092805 910529 MATGEN Loop Summary 10-Aug-1993 10:08:55 Page 8 Loop Summary From To Loop Loop at Unroll Unroll Iteration Loop# line line label index nest weight factor workload Status 1 199 208 Do 30 J 2 2 4 scalar mode preferable 2 200 204 Do 20 I 3 16 4 unrolled 3 200 204 Do 20 I 3 16 4 unrolled 4 199 208 Do 30 J 2 2 4 scalar mode preferable 5 199 208 Do 30 J 2 2 4 2 concurrentized 6 210 212 Do 40 I 1 6 concurrentized 7 209 213 Do 50 J 2 4 4 unrolled; already in a parallel loop 8 209 213 Do 50 J 2 4 4 unrolled; already in a parallel loop 9 206 208 Do 35 I 1 unrolled completely or removed PFA/SGI 10.0 k092805 910529 DGEFA Source 10-Aug-1993 10:08:55 Page 9 Footnotes Actions DO Loops Line 216 subroutine dgefa(a,lda,n,ipvt,info) 217 integer lda,n,ipvt(1),info 218 double precision a(lda,1) 219 c 220 c dgefa factors a double precision matrix by gaussian elimination. 221 c 222 c dgefa is usually called by dgeco, but it can be called 223 c directly with a saving in time if rcond is not needed. 224 c (time for dgeco) = (1 + 9/n)*(time for dgefa) . 225 c 226 c on entry 227 c 228 c a double precision(lda, n) 229 c the matrix to be factored. 230 c 231 c lda integer 232 c the leading dimension of the array a . 233 c 234 c n integer 235 c the order of the matrix a . 236 c 237 c on return 238 c 239 c a an upper triangular matrix and the multipliers 240 c which were used to obtain it. 241 c the factorization can be written a = l*u where 242 c l is a product of permutation and unit lower 243 c triangular matrices and u is upper triangular. 244 c 245 c ipvt integer(n) 246 c an integer vector of pivot indices. 247 c 248 c info integer 249 c = 0 normal value. 250 c = k if u(k,k) .eq. 0.0 . this is not an error 251 c condition for this subroutine, but it does 252 c indicate that dgesl or dgedi will divide by zero 253 c if called. use rcond in dgeco for a reliable 254 c indication of singularity. 255 c 256 c linpack. this version dated 08/14/78 . 257 c cleve moler, university of new mexico, argonne national lab. 258 c 259 c subroutines and functions 260 c 261 c blas daxpy,dscal,idamax 262 c 263 c internal variables 264 c 265 double precision t 266 integer idamax,j,k,kp1,l,nm1 PFA/SGI 10.0 k092805 910529 DGEFA Source 10-Aug-1993 10:08:55 Page 10 267 c 268 c 269 c gaussian elimination with partial pivoting 270 c 271 info = 0 1 SO 272 nm1 = n - 1 SO 273 if (nm1 .lt. 1) go to 70 2 NO SO NCS +--------- 274 do 60 k = 1, nm1 1 SO ! 275 kp1 = k + 1 ! 276 c ! 277 c find l = pivot index ! 278 c ! 279 l = idamax(n-k+1,a(k,k),1) + k - 1 ! 280 ipvt(k) = l ! 281 c ! 282 c zero pivot implies this column already triangularized ! 283 c ! 284 if (a(l,k) .eq. 0.0d0) go to 40 ! 285 c ! 286 c interchange if necessary ! 287 c SO ! 288 if (l .eq. k) go to 10 ! 289 t = a(l,k) ! 290 a(l,k) = a(k,k) ! 291 a(k,k) = t ! 292 10 continue ! 293 c ! 294 c compute multipliers ! 295 c SO ! 296 t = -1.0d0/a(k,k) ! 297 call dscal(n-k,t,a(k+1,k),1) ! 298 c ! 299 c row elimination with column indexing ! 300 c SO NCS !+-------- 301 do 30 j = kp1, n !! 302 t = a(l,j) SO !! 303 if (l .eq. k) go to 20 !! 304 a(l,j) = a(k,j) !! 305 a(k,j) = t !! 306 20 continue 3 NO NCS !! 307 call daxpy(n-k,t,a(k+1,k),1,a(k+1,j),1) 3 NO !!________ 308 30 continue ! 309 go to 50 ! 310 40 continue ! 311 info = k ! 312 50 continue !_________ 313 60 continue 314 70 continue 315 ipvt(n) = n 316 if (a(n,n) .eq. 0.0d0) info = n 317 return 318 end PFA/SGI 10.0 k092805 910529 DGEFA Source 10-Aug-1993 10:08:55 Page 11 Abbreviations Used NO not optimized SO scalar optimization NCS non-concurrent-stmt Footnote List 1: scalar optimization Statement deleted because of scalar optimization. 2: not optimized No optimizable statements found. 3: not optimized Unoptimizable call to "DAXPY" found. PFA/SGI 10.0 k092805 910529 DGEFA Loop Summary 10-Aug-1993 10:08:55 Page 12 Loop Summary From To Loop Loop at Unroll Unroll Iteration Loop# line line label index nest weight factor workload Status 1 274 313 Do 60 K 1 no optimizable statements 2 301 308 Do 30 J 2 53 1 unrolled 3 303 303 Do 30 J 2 53 1 unrolled PFA/SGI 10.0 k092805 910529 DGESL Source 10-Aug-1993 10:08:55 Page 13 Footnotes Actions DO Loops Line 319 subroutine dgesl(a,lda,n,ipvt,b,job) 320 integer lda,n,ipvt(1),job 321 double precision a(lda,1),b(1) 322 c 323 c dgesl solves the double precision system 324 c a * x = b or trans(a) * x = b 325 c using the factors computed by dgeco or dgefa. 326 c 327 c on entry 328 c 329 c a double precision(lda, n) 330 c the output from dgeco or dgefa. 331 c 332 c lda integer 333 c the leading dimension of the array a . 334 c 335 c n integer 336 c the order of the matrix a . 337 c 338 c ipvt integer(n) 339 c the pivot vector from dgeco or dgefa. 340 c 341 c b double precision(n) 342 c the right hand side vector. 343 c 344 c job integer 345 c = 0 to solve a*x = b , 346 c = nonzero to solve trans(a)*x = b where 347 c trans(a) is the transpose. 348 c 349 c on return 350 c 351 c b the solution vector x . 352 c 353 c error condition 354 c 355 c a division by zero will occur if the input factor contains a 356 c zero on the diagonal. technically this indicates singularity 357 c but it is often caused by improper arguments or improper 358 c setting of lda . it will not occur if the subroutines are 359 c called correctly and if dgeco has set rcond .gt. 0.0 360 c or dgefa has set info .eq. 0 . 361 c 362 c to compute inverse(a) * c where c is a matrix 363 c with p columns 364 c call dgeco(a,lda,n,ipvt,rcond,z) 365 c if (rcond is too small) go to ... 366 c do 10 j = 1, p 367 c call dgesl(a,lda,n,ipvt,c(1,j),0) 368 c 10 continue 369 c PFA/SGI 10.0 k092805 910529 DGESL Source 10-Aug-1993 10:08:55 Page 14 370 c linpack. this version dated 08/14/78 . 371 c cleve moler, university of new mexico, argonne national lab. 372 c 373 c subroutines and functions 374 c 375 c blas daxpy,ddot 376 c 377 c internal variables 378 c 379 double precision ddot,t 380 integer k,kb,l,nm1 381 c 1 SO 382 nm1 = n - 1 SO 383 if (job .ne. 0) go to 50 384 c 385 c job = 0 , solve a * x = b 386 c first solve l*y = b 387 c SO 388 if (nm1 .lt. 1) go to 30 SO NCS +--------- 389 do 20 k = 1, nm1 1 SO ! 390 l = ipvt(k) SO ! 391 t = b(l) SO ! 392 if (l .eq. k) go to 10 SO ! 393 b(l) = b(k) ! 394 b(k) = t ! 395 10 continue 2 NO NCS ! 396 call daxpy(n-k,t,a(k+1,k),1,b(k+1),1) 2 NO !_________ 397 20 continue 398 30 continue 399 c 400 c now solve u*x = y 401 c NCS +--------- 402 do 40 kb = 1, n ! 403 k = n + 1 - kb ! 404 b(k) = b(k)/a(k,k) SO ! 405 t = -b(k) 2 NO NCS ! 406 call daxpy(k-1,t,a(1,k),1,b(1),1) 2 NO !_________ 407 40 continue 408 go to 100 409 50 continue 410 c 411 c job = nonzero, solve trans(a) * x = b 412 c first solve trans(u)*y = b 413 c NCS +--------- 414 do 60 k = 1, n 3 NO NCS ! 415 t = ddot(k-1,a(1,k),1,b(1),1) ! 416 b(k) = (b(k) - t)/a(k,k) 3 NO !_________ 417 60 continue 418 c 419 c now solve trans(l)*x = y 420 c SO 421 if (nm1 .lt. 1) go to 90 SO NCS +--------- 422 do 80 kb = 1, nm1 PFA/SGI 10.0 k092805 910529 DGESL Source 10-Aug-1993 10:08:55 Page 15 1 SO ! 423 k = n - kb 3 NO SO NCS ! 424 b(k) = b(k) + ddot(n-k,a(k+1,k),1,b(k+1),1) 1 SO ! 425 l = ipvt(k) SO ! 426 if (l .eq. k) go to 70 SO ! 427 t = b(l) SO ! 428 b(l) = b(k) SO ! 429 b(k) = t ! 430 70 continue 3 NO !_________ 431 80 continue 432 90 continue 433 100 continue 434 return 435 end Abbreviations Used NO not optimized SO scalar optimization NCS non-concurrent-stmt Footnote List 1: scalar optimization Statement deleted because of scalar optimization. 2: not optimized Unoptimizable call to "DAXPY" found. 3: not optimized Unoptimizable call to "DDOT" found. PFA/SGI 10.0 k092805 910529 DGESL Loop Summary 10-Aug-1993 10:08:55 Page 16 Loop Summary From To Loop Loop at Unroll Unroll Iteration Loop# line line label index nest weight factor workload Status 1 389 397 Do 20 K 1 62 1 unoptimizable call (DAXPY) 2 402 407 Do 40 KB 1 62 1 unoptimizable call (DAXPY) 3 414 417 Do 60 K 1 58 1 unoptimizable call (DDOT) 4 422 431 Do 80 KB 1 74 1 unoptimizable call (DDOT) PFA/SGI 10.0 k092805 910529 DAXPY Source 10-Aug-1993 10:08:55 Page 17 Footnotes Actions DO Loops Line 436 subroutine daxpy(n,da,dx,incx,dy,incy) 437 c 438 c constant times a vector plus a vector. 439 c jack dongarra, linpack, 3/11/78. 440 c 441 double precision dx(1),dy(1),da 442 integer i,incx,incy,ix,iy,m,mp1,n 443 c SO 444 if(n.le.0)return 445 if (da .eq. 0.0d0) return SO 446 if(incx.eq.1.and.incy.eq.1)go to 20 447 c 448 c code for unequal increments or equal increments 449 c not equal to 1 450 c 451 ix = 1 452 iy = 1 SO 453 if(incx.lt.0)ix = (-n+1)*incx + 1 SO 454 if(incy.lt.0)iy = (-n+1)*incy + 1 1 2 3 Q SO C +--------- 455 do 10 i = 1,n 4 DD * 456 dy(iy) = dy(iy) + da*dx(ix) * 457 ix = ix + incx * 458 iy = iy + incy *_________ 459 10 continue 460 return 461 c 462 c code for both increments equal to 1 463 c 464 20 continue 1 2 SO C +--------- 465 do 30 i = 1,n * 466 dy(i) = dy(i) + da*dx(i) *_________ 467 30 continue 468 return 469 end Abbreviations Used DD data dependence Q question SO scalar optimization C concurrentized Footnote List 1: scalar optimization Loop unrolled 4 times to improve scalar performance. 2: scalar optimization Cleanup loop for loop unrolling. 3: question Is "INCY .EQ. 0" in the loop beginning at this statement? 4: data dependence Data dependence involving this line due to variable "DY". PFA/SGI 10.0 k092805 910529 DAXPY Loop Summary 10-Aug-1993 10:08:55 Page 18 Loop Summary From To Loop Loop at Unroll Unroll Iteration Loop# line line label index nest weight factor workload Status 1 455 459 Do 10 I 1 11 4 scalar mode preferable 2 455 459 Do 10 I 1 11 4 scalar mode preferable 3 455 459 Do 10 I 1 11 4 scalar mode preferable 4 455 459 Do 10 I 1 11 4 20 concurrentized 5 465 467 Do 30 I 1 6 4 scalar mode preferable 6 465 467 Do 30 I 1 6 4 6 concurrentized PFA/SGI 10.0 k092805 910529 DDOT Source 10-Aug-1993 10:08:55 Page 19 Footnotes Actions DO Loops Line 470 double precision function ddot(n,dx,incx,dy,incy) 471 c 472 c forms the dot product of two vectors. 473 c jack dongarra, linpack, 3/11/78. 474 c 475 double precision dx(1),dy(1),dtemp 476 integer i,incx,incy,ix,iy,m,mp1,n 477 c 478 ddot = 0.0d0 479 dtemp = 0.0d0 SO 480 if(n.le.0)return SO 481 if(incx.eq.1.and.incy.eq.1)go to 20 482 c 483 c code for unequal increments or equal increments 484 c not equal to 1 485 c 486 ix = 1 487 iy = 1 SO 488 if(incx.lt.0)ix = (-n+1)*incx + 1 SO 489 if(incy.lt.0)iy = (-n+1)*incy + 1 1 2 SO +--------- 490 do 10 i = 1,n 3 DD ! 491 dtemp = dtemp + dx(ix)*dy(iy) ! 492 ix = ix + incx ! 493 iy = iy + incy !_________ 494 10 continue 495 ddot = dtemp 496 return 497 c 498 c code for both increments equal to 1 499 c 500 20 continue 1 2 SO +--------- 501 do 30 i = 1,n 3 DD ! 502 dtemp = dtemp + dx(i)*dy(i) !_________ 503 30 continue 504 ddot = dtemp 505 return 506 end Abbreviations Used DD data dependence SO scalar optimization Footnote List 1: scalar optimization Loop unrolled 4 times to improve scalar performance. 2: scalar optimization Cleanup loop for loop unrolling. 3: data dependence Data dependence involving this line due to variable "DTEMP". PFA/SGI 10.0 k092805 910529 DDOT Loop Summary 10-Aug-1993 10:08:55 Page 20 Loop Summary From To Loop Loop at Unroll Unroll Iteration Loop# line line label index nest weight factor workload Status 1 490 494 Do 10 I 1 9 4 unrolled 2 490 494 Do 10 I 1 9 4 unrolled 3 501 503 Do 30 I 1 5 4 unrolled 4 501 503 Do 30 I 1 5 4 unrolled PFA/SGI 10.0 k092805 910529 DSCAL Source 10-Aug-1993 10:08:55 Page 21 Footnotes Actions DO Loops Line 507 subroutine dscal(n,da,dx,incx) 508 c 509 c scales a vector by a constant. 510 c jack dongarra, linpack, 3/11/78. 511 c 512 double precision da,dx(1) 513 integer i,incx,m,mp1,n,nincx 514 c SO 515 if(n.le.0)return SO 516 if(incx.eq.1)go to 20 517 c 518 c code for increment not equal to 1 519 c 1 SO 520 nincx = n*incx SO C +--------- 521 do 10 i = 1,nincx,incx * 522 dx(i) = da*dx(i) *_________ 523 10 continue 524 return 525 c 526 c code for increment equal to 1 527 c 528 20 continue 2 3 SO C +--------- 529 do 30 i = 1,n * 530 dx(i) = da*dx(i) *_________ 531 30 continue 532 return 533 end Abbreviations Used SO scalar optimization C concurrentized Footnote List 1: scalar optimization Statement deleted because of scalar optimization. 2: scalar optimization Loop unrolled 4 times to improve scalar performance. 3: scalar optimization Cleanup loop for loop unrolling. PFA/SGI 10.0 k092805 910529 DSCAL Loop Summary 10-Aug-1993 10:08:55 Page 22 Loop Summary From To Loop Loop at Unroll Unroll Iteration Loop# line line label index nest weight factor workload Status 1 521 523 Do 10 I 1 4 4 4 concurrentized 2 529 531 Do 30 I 1 4 4 scalar mode preferable 3 529 531 Do 30 I 1 4 4 4 concurrentized PFA/SGI 10.0 k092805 910529 IDAMAX Source 10-Aug-1993 10:08:55 Page 23 Footnotes Actions DO Loops Line 534 integer function idamax(n,dx,incx) 535 c 536 c finds the index of element having max. dabsolute value. 537 c jack dongarra, linpack, 3/11/78. 538 c 539 double precision dx(1),dmax 540 integer i,incx,ix,n 541 c 542 idamax = 0 SO 543 if( n .lt. 1 ) return 544 idamax = 1 SO 545 if(n.eq.1)return SO 546 if(incx.eq.1)go to 20 547 c 548 c code for increment not equal to 1 549 c 1 SO 550 ix = 1 551 dmax = dabs(dx(1)) SO 552 ix = ix + incx 2 3 SO +--------- 553 do 10 i = 2,n 4 DD SO ! 554 if(dabs(dx(ix)).le.dmax) go to 5 5 DD ! 555 idamax = i 4 DD SO ! 556 dmax = dabs(dx(ix)) SO ! 557 5 ix = ix + incx !_________ 558 10 continue 559 return 560 c 561 c code for increment equal to 1 562 c 563 20 dmax = dabs(dx(1)) 2 3 SO +--------- 564 do 30 i = 2,n 4 DD ! 565 if(dabs(dx(i)).le.dmax) go to 30 5 DD ! 566 idamax = i 4 DD ! 567 dmax = dabs(dx(i)) !_________ 568 30 continue 569 return 570 end Abbreviations Used DD data dependence SO scalar optimization Footnote List 1: scalar optimization Statement deleted because of scalar optimization. 2: scalar optimization Loop unrolled 4 times to improve scalar performance. 3: scalar optimization Cleanup loop for loop unrolling. 4: data dependence Data dependence involving this line due to variable "DMAX". 5: data dependence Data dependence involving this line due to variable "IDAMAX". PFA/SGI 10.0 k092805 910529 IDAMAX Loop Summary 10-Aug-1993 10:08:55 Page 24 Loop Summary From To Loop Loop at Unroll Unroll Iteration Loop# line line label index nest weight factor workload Status 1 553 558 Do 10 I 1 12 4 unrolled 2 553 558 Do 10 I 1 12 4 unrolled 3 564 568 Do 30 I 1 10 4 unrolled 4 564 568 Do 30 I 1 10 4 unrolled PFA/SGI 10.0 k092805 910529 EPSLON Source 10-Aug-1993 10:08:55 Page 25 Footnotes Actions DO Loops Line 571 double precision function epslon (x) 572 double precision x 573 c 574 c estimate unit roundoff in quantities of size x. 575 c 576 double precision a,b,c,eps 577 c 578 c this program should function properly on all systems 579 c satisfying the following two assumptions, 580 c 1. the base used in representing dfloating point 581 c numbers is not a power of three. 582 c 2. the quantity a in statement 10 is represented to 583 c the accuracy used in dfloating point variables 584 c that are stored in memory. 585 c the statement number 10 and the go to 10 are intended to 586 c force optimizing compilers to generate code satisfying 587 c assumption 2. 588 c under these assumptions, it should be true that, 589 c a is not exactly equal to four-thirds, 590 c b has a zero for its last bit or digit, 591 c c is not exactly equal to one, 592 c eps measures the separation of 1.0 from 593 c the next larger dfloating point number. 594 c the developers of eispack would appreciate being informed 595 c about any systems where these assumptions do not hold. 596 c 597 c ***************************************************************** 598 c this routine is one of the auxiliary routines used by eispack iii 599 c to avoid machine dependencies. 600 c ***************************************************************** 601 c 602 c this version dated 4/6/83. 603 c 604 a = 4.0d0/3.0d0 605 10 b = a - 1.0d0 606 c = b + b + b 607 eps = dabs(c-1.0d0) 608 if (eps .eq. 0.0d0) go to 10 609 epslon = eps*dabs(x) 610 return 611 end PFA/SGI 10.0 k092805 910529 EPSLON Loop Summary 10-Aug-1993 10:08:55 Page 26 Loop Summary From To Loop Loop at Unroll Unroll Iteration Loop# line line label index nest weight factor workload Status 1 605 608 Do 1 optimization disabled PFA/SGI 10.0 k092805 910529 DMXPY Source 10-Aug-1993 10:08:55 Page 27 Footnotes Actions DO Loops Line 612 subroutine dmxpy (n1, y, n2, ldm, x, m) 613 double precision y(*), x(*), m(ldm,*) 614 c 615 c purpose: 616 c multiply matrix m times vector x and add the result to vector y. 617 c 618 c parameters: 619 c 620 c n1 integer, number of elements in vector y, and number of rows in 621 c matrix m 622 c 623 c y double precision(n1), vector of length n1 to which is added 624 c the product m*x 625 c 626 c n2 integer, number of elements in vector x, and number of columns 627 c in matrix m 628 c 629 c ldm integer, leading dimension of array m 630 c 631 c x double precision(n2), vector of length n2 632 c 633 c m double precision(ldm,n2), matrix of n1 rows and n2 columns 634 c 635 c ---------------------------------------------------------------------- 636 c 637 c cleanup odd vector 638 c 1 SO 639 j = mod(n2,2) SO 640 if (j .ge. 1) then 2 3 SO C +--------- 641 do 10 i = 1, n1 SO * 642 y(i) = (y(i)) + x(j)*m(i,j) *_________ 643 10 continue 644 endif 645 c 646 c cleanup odd group of two vectors 647 c 1 SO 648 j = mod(n2,4) SO 649 if (j .ge. 2) then 2 3 SO C +--------- 650 do 20 i = 1, n1 SO * 651 y(i) = ( (y(i)) * 652 $ + x(j-1)*m(i,j-1)) + x(j)*m(i,j) *_________ 653 20 continue 654 endif 655 c 656 c cleanup odd group of four vectors 657 c 1 SO 658 j = mod(n2,8) SO 659 if (j .ge. 4) then 2 3 SO C +--------- 660 do 30 i = 1, n1 SO * 661 y(i) = ((( (y(i)) * 662 $ + x(j-3)*m(i,j-3)) + x(j-2)*m(i,j-2)) PFA/SGI 10.0 k092805 910529 DMXPY Source 10-Aug-1993 10:08:55 Page 28 * 663 $ + x(j-1)*m(i,j-1)) + x(j) *m(i,j) *_________ 664 30 continue 665 endif 666 c 667 c cleanup odd group of eight vectors 668 c 1 SO 669 j = mod(n2,16) SO 670 if (j .ge. 8) then 3 4 SO C +--------- 671 do 40 i = 1, n1 SO * 672 y(i) = ((((((( (y(i)) * 673 $ + x(j-7)*m(i,j-7)) + x(j-6)*m(i,j-6)) * 674 $ + x(j-5)*m(i,j-5)) + x(j-4)*m(i,j-4)) * 675 $ + x(j-3)*m(i,j-3)) + x(j-2)*m(i,j-2)) * 676 $ + x(j-1)*m(i,j-1)) + x(j) *m(i,j) *_________ 677 40 continue 678 endif 679 c 680 c main loop - groups of sixteen vectors 681 c SO 682 jmin = j+16 LR +--------- 683 do 60 j = jmin, n2, 16 LR C !+-------- 684 do 50 i = 1, n1 5 DD !* 685 y(i) = ((((((((((((((( (y(i)) !* 686 $ + x(j-15)*m(i,j-15)) + x(j-14)*m(i,j-14)) !* 687 $ + x(j-13)*m(i,j-13)) + x(j-12)*m(i,j-12)) !* 688 $ + x(j-11)*m(i,j-11)) + x(j-10)*m(i,j-10)) !* 689 $ + x(j- 9)*m(i,j- 9)) + x(j- 8)*m(i,j- 8)) !* 690 $ + x(j- 7)*m(i,j- 7)) + x(j- 6)*m(i,j- 6)) !* 691 $ + x(j- 5)*m(i,j- 5)) + x(j- 4)*m(i,j- 4)) !* 692 $ + x(j- 3)*m(i,j- 3)) + x(j- 2)*m(i,j- 2)) !* 693 $ + x(j- 1)*m(i,j- 1)) + x(j) *m(i,j) !*________ 694 50 continue !_________ 695 60 continue 696 return 697 end Abbreviations Used LR loop reordering DD data dependence SO scalar optimization C concurrentized Footnote List 1: scalar optimization Statement deleted because of scalar optimization. 2: scalar optimization Loop unrolled 4 times to improve scalar performance. 3: scalar optimization Cleanup loop for loop unrolling. 4: scalar optimization Loop unrolled 2 times to improve scalar performance. 5: data dependence Data dependence involving this line due to variable "Y". PFA/SGI 10.0 k092805 910529 DMXPY Loop Summary 10-Aug-1993 10:08:55 Page 29 Loop Summary From To Loop Loop at Unroll Unroll Iteration Loop# line line label index nest weight factor workload Status 1 641 643 Do 10 I 1 7 4 scalar mode preferable 2 641 643 Do 10 I 1 7 4 12 concurrentized 3 650 653 Do 20 I 1 12 4 scalar mode preferable 4 650 653 Do 20 I 1 12 4 23 concurrentized 5 660 664 Do 30 I 1 22 4 scalar mode preferable 6 660 664 Do 30 I 1 22 4 45 concurrentized 7 671 677 Do 40 I 1 42 2 scalar mode preferable 8 671 677 Do 40 I 1 42 2 89 concurrentized 9 684 694 Do 50 I 1 concurrentized 10 683 695 Do 60 J 2 111 1 already in a parallel loop PFA/SGI 10.0 k092805 910529 _MAIN Source 10-Aug-1993 10:08:55 Page 30 Footnotes Actions DO Loops Line 698 699